{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "# Preprocessing of the CPDB network\n",
    "In this notebook, I want to collect the protein-protein-interaction (PPI) network from ConsensusPathDB and preprocess it.\n",
    "The preprocessing contains the following steps:\n",
    "* Filter out \"complex\" interactions (more than two partners)\n",
    "* Filter out interactions with score < 0.5\n",
    "* Map the strange uniprot gene names to ENSEMBL IDs (either protein IDs or gene IDs)\n",
    "* Convert the resulting edgelist to an adjacency matrix\n",
    "* Export the edgelist to .sif (can be read by cytoscape directly)\n",
    "\n",
    "I downloaded the CPDB network from [here](http://cpdb.molgen.mpg.de/download/ConsensusPathDB_human_PPI.gz) on the 29th of January 2019."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import mygene\n",
    "import h5py\n",
    "import networkx as nx\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "def save_sif(edgelist, outfile):\n",
    "    print (\"Saving edgelist to {}\".format(outfile))\n",
    "    with open(outfile, 'w') as out:\n",
    "        for idx, row in edgelist.iterrows():\n",
    "            out.write('{} (interacts) {}\\n'.format(row.partner1, row.partner2))\n",
    "    print (\".sif successfully saved!\")\n",
    "\n",
    "\n",
    "def get_gene_symbols(list_of_ensembl_ids):\n",
    "    # get Ensembl IDs for gene names\n",
    "    mg = mygene.MyGeneInfo()\n",
    "    res = mg.querymany(list_of_ensembl_ids,\n",
    "                       scopes='ensembl.gene',\n",
    "                       fields='symbol',\n",
    "                       species='human', returnall=True\n",
    "                      )\n",
    "\n",
    "    def get_symbol_and_ensembl(d):\n",
    "        if 'symbol' in d:\n",
    "            return [d['query'], d['symbol']]\n",
    "        else:\n",
    "            return [d['query'], None]\n",
    "\n",
    "    node_names = [get_symbol_and_ensembl(d) for d in res['out']]\n",
    "    # now, retrieve the names and IDs from a dictionary and put in DF\n",
    "    node_names = pd.DataFrame(node_names, columns=['Ensembl_ID', 'Symbol']).set_index('Ensembl_ID')\n",
    "    node_names.dropna(axis=0, inplace=True)\n",
    "    return node_names\n",
    "\n",
    "def get_gene_symbols_from_proteins(list_of_ensembl_ids):\n",
    "    # get Ensembl IDs for gene names\n",
    "    mg = mygene.MyGeneInfo()\n",
    "    res = mg.querymany(list_of_ensembl_ids,\n",
    "                       scopes='ensembl.protein',\n",
    "                       fields='symbol',\n",
    "                       species='human', returnall=True\n",
    "                      )\n",
    "\n",
    "    def get_symbol_and_ensembl(d):\n",
    "        if 'symbol' in d:\n",
    "            return [d['query'], d['symbol']]\n",
    "        else:\n",
    "            return [d['query'], None]\n",
    "\n",
    "    node_names = [get_symbol_and_ensembl(d) for d in res['out']]\n",
    "    # now, retrieve the names and IDs from a dictionary and put in DF\n",
    "    node_names = pd.DataFrame(node_names, columns=['Ensembl_ID', 'Symbol']).set_index('Ensembl_ID')\n",
    "    node_names.dropna(axis=0, inplace=True)\n",
    "    return node_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>#  source_databases</th>\n",
       "      <th>interaction_publications</th>\n",
       "      <th>interaction_participants</th>\n",
       "      <th>interaction_confidence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Reactome,Biogrid,Spike,HPRD,PhosphoPOINT</td>\n",
       "      <td>1660465,16713569,12547834</td>\n",
       "      <td>ANF_HUMAN,ANPRA_HUMAN</td>\n",
       "      <td>0.342234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>PhosphoPOINT,HPRD,Reactome,Biogrid</td>\n",
       "      <td>1660465,12709393,1672777,1309330</td>\n",
       "      <td>ANPRB_HUMAN,ANFC_HUMAN</td>\n",
       "      <td>0.119181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>DIP,Reactome,IntAct</td>\n",
       "      <td>22210847,17486119,21408196,16906149,19897728</td>\n",
       "      <td>STIM1_HUMAN,TRPC1_HUMAN</td>\n",
       "      <td>0.998166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>Reactome,HPRD</td>\n",
       "      <td>11591728</td>\n",
       "      <td>NOS1_HUMAN,AT2B4_HUMAN</td>\n",
       "      <td>0.694108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>CORUM,HPRD,BIND,Reactome,IntAct,MIPS-MPPI</td>\n",
       "      <td>10093054,9831708,23587463</td>\n",
       "      <td>ABCC9_HUMAN,KCJ11_HUMAN</td>\n",
       "      <td>0.999997</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                          #  source_databases  \\\n",
       "8    Reactome,Biogrid,Spike,HPRD,PhosphoPOINT   \n",
       "9          PhosphoPOINT,HPRD,Reactome,Biogrid   \n",
       "24                        DIP,Reactome,IntAct   \n",
       "25                              Reactome,HPRD   \n",
       "26  CORUM,HPRD,BIND,Reactome,IntAct,MIPS-MPPI   \n",
       "\n",
       "                        interaction_publications interaction_participants  \\\n",
       "8                      1660465,16713569,12547834    ANF_HUMAN,ANPRA_HUMAN   \n",
       "9               1660465,12709393,1672777,1309330   ANPRB_HUMAN,ANFC_HUMAN   \n",
       "24  22210847,17486119,21408196,16906149,19897728  STIM1_HUMAN,TRPC1_HUMAN   \n",
       "25                                      11591728   NOS1_HUMAN,AT2B4_HUMAN   \n",
       "26                     10093054,9831708,23587463  ABCC9_HUMAN,KCJ11_HUMAN   \n",
       "\n",
       "    interaction_confidence  \n",
       "8                 0.342234  \n",
       "9                 0.119181  \n",
       "24                0.998166  \n",
       "25                0.694108  \n",
       "26                0.999997  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "interactions = pd.read_csv('../data/networks/ConsensusPathDB_human_PPI.29012019.gz',\n",
    "                           compression='gzip',\n",
    "                           header=1,\n",
    "                           sep='\\t',\n",
    "                           encoding='utf8'\n",
    "                          )\n",
    "interactions_nona = interactions.dropna()\n",
    "interactions_nona.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(372507, 4)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA1MAAAHwCAYAAABDrzX1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3X3cbed8J/7PV4J4DpIqEQ4VlLRFg3ToFEEjWtHfj5ZRT5NKRxk11YdoTakyg7ajTNEiilRFpB7SiknjuToecpCKBC+nhBzRSiWCqET4zh97HbY79zlnnytnn/u+z3m/X6/9ylrXutba373vlfven3Otde3q7gAAALBrrrHWBQAAAGxEwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgBYqqo6t6rus9Z1rCdV9Y2quu1uOtbvVtUrp+VNVdVVtf9uOvatplr32x3HA9jbCFMAG0BVnV9V91+w73uq6leWXdN2nvvVVfWc+bbuvnN3v2cJz3Xnqvr7qrqkqr5aVR+pqmN29/PsYk33qarvTgHkG1W1tapOqaq7z/fr7ut392cXONbWnT1nd/+P7t4tP++V51l3f2Gq9Tu74/gAexthCoAfsIFGIf42yZlJbpbkh5I8JcnXducTDI7wXNjd109ygyRHJvlUkn+oqqN2Z23JcH0A7CbCFMAGU1WPq6r3V9UfT6Myn6uqB03bnpvkp5P82TQy8mdT+x2r6syquriqPl1Vvzh3vFdX1cuq6vSquizJfavqwVX1sar6WlVdUFXPWlHDvavq/04jQhdMNR2f5FFJfnt67r+d+n5vtKOqrl1Vf1pVF06PP62qa0/b7jON5Dytqr5cVV+qqsdv5z04KMltkryiu6+YHv/Y3e+f63NsVZ09vYZ/rqqjp/ZbVNVp03uxpaqeMLfPs6rq1Kr6q6r6WpLHVdU1quqE6RhfmUaabrKzn1PPbO3u30/yyiTPn3uerqrbTcvHVNV5VfX1qvpiVf1mVV0vyduT3GJulOsW26nvWVX1Vyue/j9P7++XquppK37Wz5lb/97oV1WdlORWSf52er7fXnnZ4ALv3SlV9drptZxbVUfs7H0C2MiEKYCN6Z5JPp3koCQvSHJiVVV3/16Sf0jy5OnyrCdPH8zPTPLXmY3gPDLJS6vqznPH+09JnpvZaMr7k1yW5DFJDkzy4CRPrKqHJrP7aDL7oP+/kxyc5C5Jzu7ulyd5XZIXTM/986vU/XuZjdbcJclPJLlHkmfMbf/hJDdKckiS45K8pKpuvMpxvpJkS5K/qqqHVtXN5jdW1T2SvDbJb02v4T8mOX/a/PokW5PcIsnDkvyPFaNGxyY5ddrvdZmNeD00yc9M+1yS5CWr1LQjb0pyt+lnsdKJSX61u2+Q5PAk7+ruy5I8KNMo1/S4cDv1rea+SQ5L8sAkJ9QCl4h296OTfCHJz0/P94JVuu3svXtIkpOn2k5L8mc7e16AjUyYAtiYPt/dr5juZXlNkptndrnban4uyfnd/ZfdfWV3fzTJ32T2YXibt04jO9/t7m9193u6+5xp/eOZfYj+manvo5K8o7tf393f7u6vdPfZC9b9qCTP7u4vd/dFSf4gyaPntn972v7t7j49yTeS3GHlQbq7MwsM5yf5kyRfqqr3VdVhU5fjkryqu8+cXsMXu/tTVXVoknsn+Z3pdZ6d2ajRfA0f6O63TPv9e5JfTfJ70yjT5UmeleRhu3iJ3YVJKrOQsdK3k9ypqm7Y3ZdMP58dWVnfav6guy/r7nOS/GVmAfpqWfC9e393nz6dlydlFpgB9lrCFMDG9C/bFrr7m9Pi9bfT99ZJ7jldkvfVqvpqZqHmh+f6XDC/Q1Xds6reXVUXVdWlSf5LZqNgSXJokn8erPsWST4/t/75qW2br3T3lXPr38x2XtcUbp7c3T+S2Wu8LLPRqB3VeIskF3f311fUcMjc+gU/uEtuneTNc+/dJ5N8J9sPr6s5JEkn+eoq2/7/JMck+XxVvbeqfmonx1pZ3876rHyPRy3y3v3L3PI3kxywi6ETYEMRpgD2Pr1i/YIk7+3uA+ce1+/uJ+5gn7/O7DKtQ7v7Rkn+PLORlW3H+5EFn3ulCzMLJ9vcamq7Wrr7gswuvTt8atpejRcmuUlV3WBFDV+cP9yKfS5I8qAV798B3f3FLO4Xknx0unxvZe1ndfexmV2C+ZYkp2ynju3Vt5pD55bn3+PLklx3btt8oN7ZsRd57wD2KcIUwN7nX5PMf4fR3yW5fVU9uqquOT3uXlU/uoNj3CCzUYhvTfcf/ae5ba9Lcv+q+sWq2r+qblpVd9nOc6/0+iTPqKqDp0kkfj/JyskTdqqqblxVf1BVt5smiDgoyX9O8sGpy4lJHl9VR03bD6mqO06h6/8m+Z9VdUBV/XhmlwRu796jZBYkn1tVt56e++CqOnaBGmt63mcm+ZUkv7tKn2tV1aOq6kbd/e3MZiPcNg35vya5aVXdaJH3ZIX/XlXXne6Le3ySN0ztZyc5pqpuUlU/nOSpK/bb7s9v8L0D2KsJUwB7nxdldk/PJVX14umyrAcmeURmowv/ktnMctfewTF+Lcmzq+rrmQWebaMl6e4vZHZZ2tOSXJzZB/Rt98acmNn9P1+tqresctznJNmc5ONJzkny0altV12RZFOSd2QWQD6R5PIkj5tq/HBmIeKFSS5N8t58f0TskdO+FyZ5c5JndveZO3iuF2U2Svf30/vxwcwmANmeW1TVNzK73+usJD+W5D7d/ffb6f/oJOdPs/P9lyS/PL2GT2UWPj87vZ+7cqneezOboOOdSf547rlPSvJPmd1r9vf5fsja5n9mFna/WlW/ucpxd/W9A9ir1eweXgAAAHaFkSkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABiwz30r+UEHHdSbNm1a6zIAAIB16iMf+ci/dffBO+u3z4WpTZs2ZfPmzWtdBgAAsE5V1ecX6ecyPwAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGDA/mtdAAAAsO/ZdMLbvrd8/vMevIaVjDMyBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYsNUxV1X+rqnOr6hNV9fqqOqCqblNVH6qqz1TVG6rqWlPfa0/rW6btm+aO8/Sp/dNV9bNz7UdPbVuq6oRlvhYAAIB5SwtTVXVIkqckOaK7D0+yX5JHJHl+khd292FJLkly3LTLcUku6e7bJXnh1C9VdadpvzsnOTrJS6tqv6raL8lLkjwoyZ2SPHLqCwAAsHTLvsxv/yTXqar9k1w3yZeS3C/JqdP21yR56LR87LSeaftRVVVT+8ndfXl3fy7JliT3mB5buvuz3X1FkpOnvgAAAEu3tDDV3V9M8sdJvpBZiLo0yUeSfLW7r5y6bU1yyLR8SJILpn2vnPrfdL59xT7bawcAAFi6ZV7md+PMRopuk+QWSa6X2SV5K/W2XbazbVfbV6vl+KraXFWbL7roop2VDgAAsFPLvMzv/kk+190Xdfe3k7wpyX9IcuB02V+S3DLJhdPy1iSHJsm0/UZJLp5vX7HP9tqvortf3t1HdPcRBx988O54bQAAwD5umWHqC0mOrKrrTvc+HZXkvCTvTvKwqc9jk7x1Wj5tWs+0/V3d3VP7I6bZ/m6T5LAkH05yVpLDptkBr5XZJBWnLfH1AAAAfM/+O+8yprs/VFWnJvlokiuTfCzJy5O8LcnJVfWcqe3EaZcTk5xUVVsyG5F6xHScc6vqlMyC2JVJntTd30mSqnpykjMymynwVd197rJeDwAAwLylhakk6e5nJnnmiubPZjYT38q+30ry8O0c57lJnrtK++lJTr/6lQIAAOyaZU+NDgAAsFcSpgAAAAYIUwAAAAOEKQAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABiw1TFXVgVV1alV9qqo+WVU/VVU3qaozq+oz039vPPWtqnpxVW2pqo9X1d3mjvPYqf9nquqxc+0/WVXnTPu8uKpqma8HAABgm2WPTL0oyf/p7jsm+Ykkn0xyQpJ3dvdhSd45rSfJg5IcNj2OT/KyJKmqmyR5ZpJ7JrlHkmduC2BTn+Pn9jt6ya8HAAAgyRLDVFXdMMl/THJiknT3Fd391STHJnnN1O01SR46LR+b5LU988EkB1bVzZP8bJIzu/vi7r4kyZlJjp623bC7P9DdneS1c8cCAABYqmWOTN02yUVJ/rKqPlZVr6yq6yW5WXd/KUmm//7Q1P+QJBfM7b91attR+9ZV2gEAAJZumWFq/yR3S/Ky7r5rksvy/Uv6VrPa/U490H7VA1cdX1Wbq2rzRRddtOOqAQAAFrDMMLU1ydbu/tC0fmpm4epfp0v0Mv33y3P9D53b/5ZJLtxJ+y1Xab+K7n55dx/R3UccfPDBV+tFAQAAJEsMU939L0kuqKo7TE1HJTkvyWlJts3I99gkb52WT0vymGlWvyOTXDpdBnhGkgdW1Y2niScemOSMadvXq+rIaRa/x8wdCwAAYKn2X/Lx/2uS11XVtZJ8NsnjMwtwp1TVcUm+kOThU9/TkxyTZEuSb059090XV9UfJjlr6vfs7r54Wn5iklcnuU6St08PAACApVtqmOrus5Mcscqmo1bp20metJ3jvCrJq1Zp35zk8KtZJgAAwC5b9vdMAQAA7JWEKQAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAHCFAAAwICFwlRVHb7sQgAAADaSRUem/ryqPlxVv1ZVBy61IgAAgA1goTDV3fdO8qgkhybZXFV/XVUPWGplAAAA69jC90x192eSPCPJ7yT5mSQvrqpPVdX/t6ziAAAA1qtF75n68ap6YZJPJrlfkp/v7h+dll+4xPoAAADWpf0X7PdnSV6R5He7+9+3NXb3hVX1jKVUBgAAsI4tGqaOSfLv3f2dJKmqayQ5oLu/2d0nLa26fcymE972veXzn/fgNawEAADYmUXvmXpHkuvMrV93agMAANgnLRqmDujub2xbmZavu5ySAAAA1r9Fw9RlVXW3bStV9ZNJ/n0H/QEAAPZqi94z9dQkb6yqC6f1myf5peWUBAAAsP4tFKa6+6yqumOSOySpJJ/q7m8vtTIAAIB1bNGRqSS5e5JN0z53rap092uXUhUAAMA6t1CYqqqTkvxIkrOTfGdq7iTCFAAAsE9adGTqiCR36u5eZjEAAAAbxaKz+X0iyQ8vsxAAAICNZNGRqYOSnFdVH05y+bbG7n7IUqoCAABY5xYNU89aZhEAAAAbzaJTo7+3qm6d5LDufkdVXTfJfsstDQAAYP1a6J6pqnpCklOT/MXUdEiStyyrKAAAgPVu0QkonpTkXkm+liTd/ZkkP7SsogAAANa7RcPU5d19xbaVqto/s++ZAgAA2CctGqbeW1W/m+Q6VfWAJG9M8rfLKwsAAGB9WzRMnZDkoiTnJPnVJKcnecayigIAAFjvFp3N77tJXjE9AAAA9nkLhamq+lxWuUequ2+72ysCAADYABb90t4j5pYPSPLwJDfZ/eUAAABsDAvdM9XdX5l7fLG7/zTJ/ZZcGwAAwLq16GV+d5tbvUZmI1U3WEpFAAAAG8Cil/n9ydzylUnOT/KLu70aAACADWLR2fzuu+xCAAAANpJFL/P7jR1t7+7/tXvKAQAA2Bh2ZTa/uyc5bVr/+STvS3LBMooCAABY7xYNUwcluVt3fz1JqupZSd7Y3b+yrMIAAADWs4WmRk9yqyRXzK1fkWTTbq8GAABgg1h0ZOqkJB+uqjcn6SS/kOS1S6sKAABgnVt0Nr/nVtXbk/z01PT47v7Y8soCAABY3xa9zC9Jrpvka939oiRbq+o2S6oJAABg3VsoTFXVM5P8TpKnT03XTPJXyyoKAABgvVt0ZOoXkjwkyWVJ0t0XJrnBsooCAABY7xYNU1d0d2c2+USq6nrLKwkAAGD9WzRMnVJVf5HkwKp6QpJ3JHnF8soCAABY3xadze+Pq+oBSb6W5A5Jfr+7z1xqZQAAAOvYTsNUVe2X5Izuvn8SAQoAACALXObX3d9J8s2qutEeqAcAAGBDWOgyvyTfSnJOVZ2ZaUa/JOnupyylKgAAgHVu0TD1tukBAABAdhKmqupW3f2F7n7NnioIAABgI9jZPVNv2bZQVX+z5FoAAAA2jJ2FqZpbvu0yCwEAANhIdhamejvLAAAA+7SdTUDxE1X1tcxGqK4zLWda7+6+4VKrAwAAWKd2GKa6e789VQgAAMBGstMv7QUAAOCqhCkAAIABwhQAAMCApYepqtqvqj5WVX83rd+mqj5UVZ+pqjdU1bWm9mtP61um7ZvmjvH0qf3TVfWzc+1HT21bquqEZb8WAACAbfbEyNSvJ/nk3Przk7ywuw9LckmS46b245Jc0t23S/LCqV+q6k5JHpHkzkmOTvLSKaDtl+QlSR6U5E5JHjn1BQAAWLqlhqmqumWSByd55bReSe6X5NSpy2uSPHRaPnZaz7T9qKn/sUlO7u7Lu/tzSbYkucf02NLdn+3uK5KcPPUFAABYumWPTP1pkt9O8t1p/aZJvtrdV07rW5McMi0fkuSCJJm2Xzr1/177in22134VVXV8VW2uqs0XXXTR1X1NAAAAywtTVfVzSb7c3R+Zb16la+9k2662X7Wx++XdfUR3H3HwwQfvoGoAAIDF7PBLe6+meyV5SFUdk+SAJDfMbKTqwKrafxp9umWSC6f+W5McmmRrVe2f5EZJLp5r32Z+n+21AwAALNXSRqa6++ndfcvu3pTZBBLv6u5HJXl3kodN3R6b5K3T8mnTeqbt7+runtofMc32d5skhyX5cJKzkhw2zQ54rek5TlvW6wEAAJi3zJGp7fmdJCdX1XOSfCzJiVP7iUlOqqotmY1IPSJJuvvcqjolyXlJrkzypO7+TpJU1ZOTnJFkvySv6u5z9+grAQAA9ll7JEx193uSvGda/mxmM/Gt7POtJA/fzv7PTfLcVdpPT3L6biwVAABgIXvie6YAAAD2OsIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAHCFAAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMCApYWpqjq0qt5dVZ+sqnOr6ten9ptU1ZlV9Znpvzee2quqXlxVW6rq41V1t7ljPXbq/5mqeuxc+09W1TnTPi+uqlrW6wEAAJi3zJGpK5M8rbt/NMmRSZ5UVXdKckKSd3b3YUneOa0nyYOSHDY9jk/ysmQWvpI8M8k9k9wjyTO3BbCpz/Fz+x29xNcDAADwPUsLU939pe7+6LT89SSfTHJIkmOTvGbq9pokD52Wj03y2p75YJIDq+rmSX42yZndfXF3X5LkzCRHT9tu2N0f6O5O8tq5YwEAACzVHrlnqqo2Jblrkg8luVl3fymZBa4kPzR1OyTJBXO7bZ3adtS+dZV2AACApVt6mKqq6yf5myRP7e6v7ajrKm090L5aDcdX1eaq2nzRRRftrGQAAICdWmqYqqprZhakXtfdb5qa/3W6RC/Tf788tW9Ncujc7rdMcuFO2m+5SvtVdPfLu/uI7j7i4IMPvnovCgAAIMudza+SnJjkk939v+Y2nZZk24x8j03y1rn2x0yz+h2Z5NLpMsAzkjywqm48TTzxwCRnTNu+XlVHTs/1mLljAQAALNX+Szz2vZI8Osk5VXX21Pa7SZ6X5JSqOi7JF5I8fNp2epJjkmxJ8s0kj0+S7r64qv4wyVlTv2d398XT8hOTvDrJdZK8fXoAAAAs3dLCVHe/P6vf15QkR63Sv5M8aTvHelWSV63SvjnJ4VejTAAAgCF7ZDY/AACAvY0wBQAAMECYAgAAGCBMAQAADBCmAAAABghTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAH7r3UBAADA3m/TCW9b6xJ2OyNTAAAAA4QpAACAAcIUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADhCkAAIABwhQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAbsv9YFsLpNJ7ztB9bPf96D16gSAABgNUamAAAABghTAAAAA4QpAACAAcIUAADAABNQAAAAS7FyUrW9jZEpAACAAcIUAADAAGEKAABggDAFAAAwwAQUAADAkJUTTJz/vAevUSVrQ5gCAAB2i7199r6VhCkAAGBh+1pg2hH3TAEAAAwQpgAAAAYIUwAAAAOEKQAAgAHCFAAAwABhCgAAYIAwBQAAMMD3TAEAANvle6W2z8gUAADAAGEKAABggDAFAAAwQJgCAAAYIEwBAAAMEKYAAAAGCFMAAAADfM8UAADwA3y31GKMTAEAAAwwMrUXmP+Xg/Of9+A1rAQAgI3CZ8irT5jagAy7AgCwO/l8OUaY2iCc4AAA7MyORpt8ntz9hCkAANgLCU/LZwIKAACAAcIUAADAAJf57WVWDueamQUAYO/i8r31Q5gCAIA9TCDaOwhTAACwC8yYxzbCFAAA+4RlBB3had8mTO3l1voeql15/h39MnLvF8Dut+iHQL+D2agEHZZNmNrH7GhYehnPsSeO6Y88wPdt1H9597t87S36GUFAge+r7l7rGq6Wqjo6yYuS7Jfkld39vB31P+KII3rz5s17pLZdtda/nHbXH7K1fh074o81e9qe+AcMxqz1yP3VsZ5/z65ne8u9LXvL64B56+13cFV9pLuP2Fm/DT0yVVX7JXlJkgck2ZrkrKo6rbvPW9vKNqZ94ZfxrrzG+f+pN/KHLna/0f9XnEf7Bjems2zOI1g/NnSYSnKPJFu6+7NJUlUnJzk2iTDF1bajP1b+kLE7OI/2vF0JsLvj5+NnvL74eQC720YPU4ckuWBufWuSe65RLQCsc/6RBIDdaaOHqVql7So3gVXV8UmOn1a/UVWfXmpVizsoyb+tdRFsOM4bRjhvGOG8YYTzhl1Wz193582tF+m00cPU1iSHzq3fMsmFKzt198uTvHxPFbWoqtq8yI1tMM95wwjnDSOcN4xw3jBio54311jrAq6ms5IcVlW3qaprJXlEktPWuCYAAGAfsKFHprr7yqp6cpIzMpsa/VXdfe4alwUAAOwDNnSYSpLuPj3J6Wtdx6B1d+khG4LzhhHOG0Y4bxjhvGHEhjxvNvyX9gIAAKyFjX7PFAAAwJoQpvaAqjq6qj5dVVuq6oRVtl+7qt4wbf9QVW3a81Wy3ixw3vxGVZ1XVR+vqndW1UJTeLJ329l5M9fvYVXVVbXhZk5i91vkvKmqX5x+55xbVX+9p2tk/Vng79StqurdVfWx6W/VMWv/IC79AAAHVklEQVRRJ+tHVb2qqr5cVZ/YzvaqqhdP59THq+pue7rGXSVMLVlV7ZfkJUkelOROSR5ZVXda0e24JJd09+2SvDDJ8/dslaw3C543H0tyRHf/eJJTk7xgz1bJerPgeZOqukGSpyT50J6tkPVokfOmqg5L8vQk9+ruOyd56h4vlHVlwd83z0hySnffNbMZl1+6Z6tkHXp1kqN3sP1BSQ6bHscnedkeqOlqEaaW7x5JtnT3Z7v7iiQnJzl2RZ9jk7xmWj41yVFVtdoXErPv2Ol5093v7u5vTqsfzOx71ti3LfL7Jkn+MLPw/a09WRzr1iLnzROSvKS7L0mS7v7yHq6R9WeR86aT3HBavlFW+S5Q9i3d/b4kF++gy7FJXtszH0xyYFXdfM9UN0aYWr5Dklwwt751alu1T3dfmeTSJDfdI9WxXi1y3sw7Lsnbl1oRG8FOz5uqumuSQ7v77/ZkYaxri/y+uX2S21fVP1bVB6tqR/+yzL5hkfPmWUl+uaq2Zjbz8n/dM6Wxge3q5581t+GnRt8AVhthWjmF4iJ92LcsfE5U1S8nOSLJzyy1IjaCHZ43VXWNzC4lftyeKogNYZHfN/tndtnNfTIbBf+Hqjq8u7+65NpYvxY5bx6Z5NXd/SdV9VNJTprOm+8uvzw2qA33mdjI1PJtTXLo3Potc9Vh7u/1qar9MxsK39EQKHu/Rc6bVNX9k/xekod09+V7qDbWr52dNzdIcniS91TV+UmOTHKaSSj2eYv+nXprd3+7uz+X5NOZhSv2XYucN8clOSVJuvsDSQ5IctAeqY6NaqHPP+uJMLV8ZyU5rKpuU1XXyuwGzNNW9DktyWOn5YcleVf7ArB93U7Pm+lyrb/ILEi5f4FkJ+dNd1/a3Qd196bu3pTZvXYP6e7Na1Mu68Qif6fekuS+SVJVB2V22d9n92iVrDeLnDdfSHJUklTVj2YWpi7ao1Wy0ZyW5DHTrH5HJrm0u7+01kXtiMv8lqy7r6yqJyc5I8l+SV7V3edW1bOTbO7u05KcmNnQ95bMRqQesXYVsx4seN78UZLrJ3njNF/JF7r7IWtWNGtuwfMGfsCC580ZSR5YVecl+U6S3+rur6xd1ay1Bc+bpyV5RVX9t8wu1Xqcfyzet1XV6zO7XPig6V66Zya5ZpJ0959ndm/dMUm2JPlmksevTaWLK+c0AADArnOZHwAAwABhCgAAYIAwBQAAMECYAgAAGCBMAQAADBCmAFiaqvrGAn2eWlXXXXIdB1bVr82t36KqTt1Nx/65qvpYVf1TVZ1XVb+6O44LwPpnanQAlqaqvtHd199Jn/OTHNHd/7YLx92vu7+zC/03Jfm77j580X0WPO41k3w+yT26e2tVXTvJpu7+9NU4ZmX29/m7u6tOAJbDyBQAS1dV96mq91TVqVX1qap63fQN909Jcosk766qd099H1hVH6iqj1bVG6vq+lP7+VX1+1X1/iQPr6onVNVZ04jQ32wb3aqqm1XVm6f2f6qq/5DkeUl+pKrOrqo/qqpNVfWJqf8BVfWXVXXONMJ036n9cVX1pqr6P1X1map6wSov7QZJ9k/ylSTp7su3Bant1JGq+o2q+sT0eOrUtqmqPllVL03y0SSHbu99AGD9EKYA2FPumuSpSe6U5LZJ7tXdL05yYZL7dvd9q+qgJM9Icv/uvluSzUl+Y+4Y3+rue3f3yUne1N137+6fSPLJJMdNfV6c5L1T+92SnJvkhCT/3N136e7fWlHXk5Kku38sySOTvKaqDpi23SXJLyX5sSS/VFWHzu/Y3RcnOS3J56vq9VX1qKra9rf1KnVU1U8meXySeyY5MskTququU/87JHltd981yWU7eR8AWAf2X+sCANhnfLi7tyZJVZ2dZFOS96/oc2RmYesfZ1e75VpJPjC3/Q1zy4dX1XOSHJjk+knOmNrvl+QxSTJdCnhpVd14B3XdO8n/nvp/qqo+n+T207Z3dvelU83nJbl1kgvmd+7uX6mqH0ty/yS/meQBSR63nTruneTN3X3ZdMw3JfnpTIGsuz+44PsAwDogTAGwp1w+t/ydrP43qJKc2d2P3M4xLptbfnWSh3b3P1XV45LcZ7Cu2sG2RWpOd5+T5JyqOinJ5zILU7v6XPOvbWfvAwDrgMv8AFhrX8/s3qMk+WCSe1XV7ZKkqq5bVbffzn43SPKlaRKIR821vzPJE6f996uqG654jpXet23/6blulWShCSSq6vpVdZ+5prtkNiHF9up4X5KHTq/rekl+Ick/rHLoXXkfAFgjwhQAa+3lSd5eVe/u7osyG9V5fVV9PLNQccft7Pffk3woyZlJPjXX/utJ7ltV5yT5SJI7d/dXMrtk7hNV9UcrjvPSJPtN/d+Q5HHdfXkWU0l+u6o+PV26+Af5/qjUanV8NLMRtQ9Ptb+yuz+28qC7+D4AsEZMjQ4AADDAyBQAAMAAYQoAAGCAMAUAADBAmAIAABggTAEAAAwQpgAAAAYIUwAAAAOEKQAAgAH/D9fbPWtQ+EetAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1008x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "bins = np.linspace(0, 1, 200)\n",
    "fig = plt.figure(figsize=(14, 8))\n",
    "ax = plt.hist(interactions_nona.interaction_confidence, bins)\n",
    "plt.xlabel('Interaction Score')\n",
    "plt.ylabel('Frequency')\n",
    "plt.title('Interaction Score Distribution')\n",
    "interactions_nona.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "### Remove \"complex\" interactions & low confidence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>partner1</th>\n",
       "      <th>partner2</th>\n",
       "      <th>confidence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ANF_HUMAN</td>\n",
       "      <td>ANPRA_HUMAN</td>\n",
       "      <td>0.342234</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ANPRB_HUMAN</td>\n",
       "      <td>ANFC_HUMAN</td>\n",
       "      <td>0.119181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>STIM1_HUMAN</td>\n",
       "      <td>TRPC1_HUMAN</td>\n",
       "      <td>0.998166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NOS1_HUMAN</td>\n",
       "      <td>AT2B4_HUMAN</td>\n",
       "      <td>0.694108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ABCC9_HUMAN</td>\n",
       "      <td>KCJ11_HUMAN</td>\n",
       "      <td>0.999997</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      partner1     partner2  confidence\n",
       "0    ANF_HUMAN  ANPRA_HUMAN    0.342234\n",
       "1  ANPRB_HUMAN   ANFC_HUMAN    0.119181\n",
       "2  STIM1_HUMAN  TRPC1_HUMAN    0.998166\n",
       "3   NOS1_HUMAN  AT2B4_HUMAN    0.694108\n",
       "4  ABCC9_HUMAN  KCJ11_HUMAN    0.999997"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# select interactions with exactly two partners\n",
    "binary_inter = interactions_nona[interactions_nona.interaction_participants.str.count(',') == 1]\n",
    "# split the interactions columns into interaction partners\n",
    "edgelist = pd.concat([binary_inter.interaction_participants.str.split(',', expand=True),\n",
    "                                binary_inter.interaction_confidence], axis=1\n",
    "                              )\n",
    "# make the dataframe beautiful\n",
    "edgelist.set_index([np.arange(edgelist.shape[0])], inplace=True)\n",
    "edgelist.columns = ['partner1', 'partner2', 'confidence']\n",
    "edgelist.to_csv('../data/networks/CPDB_uni_edgelist.tsv', sep='\\t')\n",
    "\n",
    "# select interactions with confidence score above threshold\n",
    "high_conf_edgelist = edgelist[edgelist.confidence > .5]\n",
    "\n",
    "edgelist.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((397053, 4), (372507, 4), (372507, 3), (247746, 3))"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "interactions.shape, interactions_nona.shape, edgelist.shape, high_conf_edgelist.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "### Map from Uniprot Name to Ensembl ID\n",
    "This is not simple at all. We have to write both columns with the interaction partners to file and then use the uniprot webservice (http://www.uniprot.org/mapping/) to do the mapping. The resulting csv files then have to be read again to form the correct edgelist for us."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true,
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# write involved uniprot names to file\n",
    "uniprot_names = edgelist.partner1.append(edgelist.partner2).unique()\n",
    "np.savetxt('../data/networks/uniprot_names.txt', uniprot_names, fmt='%s')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "We were unable to map 0 source and 0 target genes.\n",
      "We lost 0 interactions this way.\n",
      "Final edge list has 305196 interactions\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>partner1</th>\n",
       "      <th>partner2</th>\n",
       "      <th>confidence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ENSG00000167323</td>\n",
       "      <td>ENSG00000144935</td>\n",
       "      <td>0.998166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ENSG00000089250</td>\n",
       "      <td>ENSG00000058668</td>\n",
       "      <td>0.694108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ENSG00000069431</td>\n",
       "      <td>ENSG00000187486</td>\n",
       "      <td>0.999997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>ENSG00000084754</td>\n",
       "      <td>ENSG00000138029</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>ENSG00000197265</td>\n",
       "      <td>ENSG00000153767</td>\n",
       "      <td>0.999969</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          partner1         partner2  confidence\n",
       "2  ENSG00000167323  ENSG00000144935    0.998166\n",
       "3  ENSG00000089250  ENSG00000058668    0.694108\n",
       "4  ENSG00000069431  ENSG00000187486    0.999997\n",
       "5  ENSG00000084754  ENSG00000138029    1.000000\n",
       "6  ENSG00000197265  ENSG00000153767    0.999969"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Build a uniprot mapping manually using the webservice :-(\n",
    "# load the mapped partners\n",
    "mapping = pd.read_csv('../data/networks/uniprot_ensg_mapping.tsv',\n",
    "                      sep='\\t',\n",
    "                      header=0,\n",
    "                      names=['ensembl']\n",
    "                     )\n",
    "\n",
    "# get them into our dataframe (size increases because of duplicates in mapping)\n",
    "# that is, one uniprot gene name has multiple ensembl gene names, hence we have to add those interactions\n",
    "p1_incl = high_conf_edgelist.join(mapping, on='partner1', how='inner', rsuffix='_p1')\n",
    "both_incl = p1_incl.join(mapping, on='partner2', how='inner', rsuffix='_p2')\n",
    "both_incl.columns = ['partner1', 'partner2', 'confidence', 'partner1_ensembl', 'partner2_ensembl']\n",
    "\n",
    "# collect statistics on how many interactions we lost\n",
    "num_unmaps = both_incl[both_incl.partner1_ensembl.isnull() | both_incl.partner2_ensembl.isnull()].shape[0]\n",
    "num_p1_unmaps = p1_incl[p1_incl.ensembl.isnull()].partner1.unique().shape[0]\n",
    "num_p2_unmaps = both_incl[both_incl.partner2_ensembl.isnull()].partner2.unique().shape[0]\n",
    "print (\"We were unable to map {} source and {} target genes.\".format(num_p1_unmaps, num_p2_unmaps))\n",
    "print (\"We lost {} interactions this way.\".format(num_unmaps))\n",
    "\n",
    "# kick out the NaNs and remove uniprot names\n",
    "final_edgelist = both_incl.dropna(axis=0)\n",
    "final_edgelist.drop(['partner1', 'partner2'], axis=1, inplace=True)\n",
    "print (\"Final edge list has {} interactions\".format(final_edgelist.shape[0]))\n",
    "\n",
    "# sort by number and put confidence at last and rename columns\n",
    "final_edgelist.sort_index(inplace=True)\n",
    "cols = final_edgelist.columns.tolist()\n",
    "cols = cols[1:] + [cols[0]]\n",
    "final_edgelist = final_edgelist[cols]\n",
    "final_edgelist.columns = ['partner1', 'partner2', 'confidence']\n",
    "\n",
    "# write to file and look at the first rows\n",
    "final_edgelist.to_csv('../data/networks/CPDB_ensg_edgelist.tsv', sep='\\t')\n",
    "save_sif(final_edgelist, '../data/networks/CPDB_ensg_edgelist.sif')\n",
    "final_edgelist.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Map from Ensembl IDs to Hugo Symbols\n",
    "Additionally to the edgelists for Uniprot and Ensembl IDs, I also want an edgelist containing the gene names (regular Hugo Symbols).\n",
    "\n",
    "**I derive it here using the *mygene* webservice**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "querying 1-1000...done.\n",
      "querying 1001-2000...done.\n",
      "querying 2001-3000...done.\n",
      "querying 3001-4000...done.\n",
      "querying 4001-5000...done.\n",
      "querying 5001-6000...done.\n",
      "querying 6001-7000...done.\n",
      "querying 7001-8000...done.\n",
      "querying 8001-9000...done.\n",
      "querying 9001-10000...done.\n",
      "querying 10001-11000...done.\n",
      "querying 11001-12000...done.\n",
      "querying 12001-13000...done.\n",
      "querying 13001-14000...done.\n",
      "querying 14001-14956...done.\n",
      "Finished.\n",
      "2 input query terms found no hit:\n",
      "\t['ENSG00000243444', 'ENSG00000189144']\n",
      "We were unable to map 0 source and 0 target genes.\n",
      "We lost 0 interactions this way.\n",
      "Final edge list has 305186 interactions\n",
      "Dropping 52719 interactions because the are redundant for gene names\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>partner1</th>\n",
       "      <th>partner2</th>\n",
       "      <th>confidence</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>STIM1</td>\n",
       "      <td>TRPC1</td>\n",
       "      <td>0.998166</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NOS1</td>\n",
       "      <td>ATP2B4</td>\n",
       "      <td>0.694108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ABCC9</td>\n",
       "      <td>KCNJ11</td>\n",
       "      <td>0.999997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>HADHA</td>\n",
       "      <td>HADHB</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>GTF2E2</td>\n",
       "      <td>GTF2E1</td>\n",
       "      <td>0.999969</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  partner1 partner2  confidence\n",
       "2    STIM1    TRPC1    0.998166\n",
       "3     NOS1   ATP2B4    0.694108\n",
       "4    ABCC9   KCNJ11    0.999997\n",
       "5    HADHA    HADHB    1.000000\n",
       "6   GTF2E2   GTF2E1    0.999969"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ens_names = final_edgelist.partner1.append(final_edgelist.partner2).unique()\n",
    "ens_to_symbol = get_gene_symbols(ens_names)\n",
    "\n",
    "p1_incl = final_edgelist.join(ens_to_symbol, on='partner1', how='inner', rsuffix='_p1')\n",
    "both_incl = p1_incl.join(ens_to_symbol, on='partner2', how='inner', rsuffix='_p2')\n",
    "both_incl.columns = ['partner1', 'partner2', 'confidence', 'partner1_symbol', 'partner2_symbol']\n",
    "\n",
    "# collect statistics on how many interactions we lost\n",
    "num_unmaps = both_incl[both_incl.partner1_symbol.isnull() | both_incl.partner2_symbol.isnull()].shape[0]\n",
    "num_p1_unmaps = p1_incl[p1_incl.Symbol.isnull()].partner1.unique().shape[0]\n",
    "num_p2_unmaps = both_incl[both_incl.partner2_symbol.isnull()].partner2.unique().shape[0]\n",
    "print (\"We were unable to map {} source and {} target genes.\".format(num_p1_unmaps, num_p2_unmaps))\n",
    "print (\"We lost {} interactions this way.\".format(num_unmaps))\n",
    "\n",
    "# kick out the NaNs and remove ensembl IDs\n",
    "final_edgelist_symbols = both_incl.dropna(axis=0)\n",
    "final_edgelist_symbols.drop(['partner1', 'partner2'], axis=1, inplace=True)\n",
    "print (\"Final edge list has {} interactions\".format(final_edgelist_symbols.shape[0]))\n",
    "\n",
    "# sort by number and put confidence at last and rename columns\n",
    "final_edgelist_symbols.sort_index(inplace=True)\n",
    "cols = final_edgelist_symbols.columns.tolist()\n",
    "cols = cols[1:] + [cols[0]]\n",
    "final_edgelist_symbols = final_edgelist_symbols[cols]\n",
    "final_edgelist_symbols.columns = ['partner1', 'partner2', 'confidence']\n",
    "\n",
    "# remove duplicated interactions (can happen due to multiple Ensembl IDs mapping to the same gene name)\n",
    "no_interactions = final_edgelist_symbols.shape[0]\n",
    "final_edgelist_symbols.drop_duplicates(inplace=True)\n",
    "print (\"Dropping {} interactions because the are redundant for gene names\".format(no_interactions - final_edgelist_symbols.shape[0]))\n",
    "\n",
    "# write to file and look at the first rows\n",
    "final_edgelist_symbols.to_csv('../data/networks/CPDB_symbols_edgelist.tsv', sep='\\t')\n",
    "final_edgelist_symbols.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "### Construct Adjacency Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Constructed Adjacency Matrix with average node degree of: 37.03022300469483\n",
      "Adjacency matrix has 252398 edges and 13632 nodes in total\n"
     ]
    }
   ],
   "source": [
    "G = nx.from_pandas_edgelist(df=final_edgelist_symbols, source='partner1', target='partner2', edge_attr='confidence')\n",
    "adj_pd = nx.to_pandas_adjacency(G)\n",
    "adjacency_matrix = adj_pd.values\n",
    "node_names = adj_pd.index.values\n",
    "\n",
    "avg_node_degree = np.mean([value for key, value in G.degree()])\n",
    "print (\"Constructed Adjacency Matrix with average node degree of: {}\".format(avg_node_degree))\n",
    "print (\"Adjacency matrix has {} edges and {} nodes in total\".format(G.number_of_edges(), G.number_of_nodes()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Text(0.5, 1.0, 'Node Degree Distribution of Genes in PPI')"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA00AAAHwCAYAAAB6yISuAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xu8bWVdL/7PV/CKGCBgCiKWZFknzYOiWd4rNRWP6UkxIyKx8yOjrLzlCU27eEpNy+MRRUUTL3kDDVND0axELqJ4ySA1IFBQEFELRb6/P+ZYOt2sNdbcmzXXmmvt9/v1Wq81xzPGHOO7nj3YrM9+nvHM6u4AAACwvBtsdAEAAACLTGgCAAAYITQBAACMEJoAAABGCE0AAAAjhCYAAIARQhPAGqqqZ1XVX290HYuqqt5VVYev0bl+uqo+M7X9+ap64FqcezjfJ6vqvmt1vhmvWVX1qqq6oqo+sp7XXktV9YyqesVG1wGwVoQmYKc2/KL9xarabart16rqtA2q5T+r6qqq+kpV/VNV/XpVbYq/q6uqq+rrVfW1qvpyVZ1aVb84fUx3P7i7T5jxXHcYO6a7/6G773h96x6u9+qqeu425//R7j5tLc6/HX4qyc8k2b+7777cAVV166p6eVVdPPT1Z4f6f3h9S11Zd/9xd//ajrx3+Fm+Ofxsl1fVe5d+tuEfJb417Fv6b+Sew75fqaoPreXPAbBkU/yPGGDOdk1yzEYXMXhYd++e5HZJ/jTJU5McP48LVdUuczjtnbv75knumOTVSf6qqo5d64tU1a5rfc4Fcbskn+/ury+3s6pumeSfktwsyU8n2T3JXZN8IJOwtVX8n+E+2j/JpZncS0veOOzbJ8mHkry1qmr9SwR2JkITQPJnSX63qvZYbmdV/WRVnVFVVw7ff3Jq3+2r6gPD6NB7k+y9zXvvMfxr+Feq6mOzTvfq7iu7++Qkv5jk8Kr6seF8N66qP6+qC4YRsv9XVTedut5TquqSYRTi16ZHbIZ/wX9pVZ1SVV9Pcr8ZzvfQqjpn6l/1f3zG+r/U3a9N8r+SPH34ZT9VdVpV/drw+g5D311ZVV+qqjcO7R8cTvOxYUThF6vqvlV1UVU9taq+kORVS23bXPpuVfWpYXrbq6rqJsM5rzMKsdQ3VXVUksclecpwvXcM+78z3W/op78Y+vXi4fWNh31Ltf1OVV069P8RK/VNVd2mqk4eRlHOr6onDO1HJnlFknsOdTx7mbf/dpKvJnl8d/9bT3ylu1/V3X85dY0V77vhz+A5VfWPw337nqrae8b3/kpNRrauqqrPVdXjVvgZvzNNtaoOHPr68OE++1JV/f5K/TOtu7+R5MQkP7bMvm8lOSHJ9ye55SznA9hRQhNAcmaS05L87rY7qmqvJH+b5MWZ/GL2giR/uxQCMvmF7qxMwtJzkhw+9d79hvc+N8lew/nfUlX7zFpYd38kyUWZjCokyfOS/FCSuyS5Q5L9kvzBcL0HJXlykgcO++6zzCkPS/JHmYxQfGiV8901ySuTPHH42V+W5OSlsDCjkzIZyVtuqtlzkrwnyZ6ZjCj85fAz33vYf+fuvnl3v3HY/v5M+vF2SY5a4XqPS/JzSX5w+LmeuVqB3X1cktdlGN3o7octc9jvJ7lHJv105+HnmT739yf5vkz678gkL6mqPVe45Osz+TO9TZJHJfnjqnpAdx+f5NeT/PNQx3IjdA9M8rbuvnaln2fG++6wJEck2TfJjYZjRt9bkymsL07y4GE09CeTnLNSHcv4qUxGIB+Q5A+q6kdWe0NV3TyTP9OPLrPvxkl+JclF3f2l7agDYLsJTQATf5DkScsEmp9Pcl53v7a7r+nu1yf5lyQPq6oDktwtyf/u7qu7+4NJ3jH13l9Kckp3n9Ld13b3ezMJaA/ZztouTrJXVVWSJyT57e6+vLuvSvLHSR4zHPc/k7yquz85/Av9ciMVJ3X3Pw6/dF+9yvmekORl3X16d397eBbp6kzCw0yG0YAvZfIL+La+lUkAuk13/1d3r/Y8yrVJjh36+j9XOOavuvvC7r48k3D42FlrXcXjkvxhd1/a3Zdl0rePn9r/rWH/t7r7lCRfyyQgfI+qum0m4eGpw898TiajS4/f9tgV7J3kC1Pne/gwInRVVb1naJ7lvntVd//r0I9vyiQMzvLea5P8WFXdtLsv6e5Pzlh3kjy7u/+zuz+W5GOZhM+V/G5VfSXJ+Ulunkk4WvI/h30XJvnvSR6xHTUA7BChCSBJd38iyTuTPG2bXbdJ8u/btP17JiMKt0lyxTbPn0wfe7skjx5+qf3K8IveTyW59XaWt1+SyzN5huNmSc6aOt/fDe1LtV449b4Lc13Tbaud73ZJfmeb+m87XGcmVXXD4XyXL7P7KUkqyUdqslLdr65yusu6+79WOWb65/v37al1FdveB9ue+8vdfc3U9jcy+WV/ufMsBdTpc+03Yx1fztT9090nd/cemUzbu9HQPMt994Wp19O1rvje4T7/xUxGwy6pqr+t7Vt8YqVrLufPu3uP7v7+7n54d//b1L43Dfv27e77d/dZ21EDwA7Zqg/SAuyIY5OcneT5U20XZ/KL5LQDMgkXlyTZs6p2mwpOByTp4fWFSV7b3U/Y0YKq6m6Z/EL9oUxGbP4zyY92938sc/glmUxzW3LbZY7pqderne/CJH/U3X+0I7UPDk1yTZLrLJ/d3V/IZDQrVfVTSf6+qj7Y3eevcK5eoX3a9M98QCZ/fkny9UwCYobrff92nnvpPlgaWZk+9/ZYGjXcfSo4HZBkuf5fzqlJHlFVzx6Zond97rvR93b3u5O8uybPvT03ycvz3amjAFuWkSaAwfDL+huT/OZU8ylJfqiqDquqXWuyhPadkryzu/89k6lLz66qGw2/+E8/D/PXmUzj+7mq2qWqbjIsGjAdbJZVVbeoqocmeUOSv+7uc4dfkl+e5IVVte9w3H5V9XPD296U5Iiq+pGqulmGZ5NGft7VzvfyJL9eVYfUxG5V9fNVtfsM9e81LBLwkiTP6+4vL3PMo6f64opMgsu3h+0vJvmB1a6zjKOrav/hWbRnZPLnmUymg/1oVd2lJotDPGub9612vdcneebwbM/emfTtdn8eV3dfmMnqd38y3A8/nskzUK+b8RQvyOQZsNdW1Q8Ofy6757vT65Lrcd+NvbeqbjVMB9wtk2maX8t3/7wAtjShCeB7/WGS73xm0/DL/kOT/E4mU6OekuShUw+eH5bkkEymnx2b5DVT770wk5GWZyS5LJN/xf+9jP/d+46qumo49vcz+SV5eiW2p2bynMeHq+qrSf4+w7Mz3f2uTB7Uf/9wzD8P77l65Hpj5zszk5Ggv8ok1Jyf7322ZDkfq6qvDcf+WibPS60U3u6W5PTh+JOTHNPdnxv2PSvJCcMUsf+5yjWnnZjJ4hKfHb6eO/ws/5rJn+3fJzkvk5G7accnudNwvbcvc97nZhKQP57k3ExGJJ+7zHGzeGySAzMZdXpbJs9pvXeWNw733T2S/FcmP8NVmSzGsHsmKxXu6H23dP6x994gk/8OLs7kfr9Pkv9vlroBNrvqnmW2AwCbzbA62SeS3Hib520AgO1gpAlgC6mq/zFMFdwzk+XE3yEwAcD1IzQBbC1PzGRa1b9l8rzJ/9rYcgBg8zM9DwAAYISRJgAAgBFCEwAAwIgt+eG2e++9dx944IEbXQYAALDAzjrrrC919z6rHbclQ9OBBx6YM888c6PLAAAAFlhV/fssx5meBwAAMEJoAgAAGCE0AQAAjBCaAAAARghNAAAAI4QmAACAEUITAADACKEJAABghNAEAAAwQmgCAAAYITQBAACMEJoAAABGCE0AAAAj5haaquqVVXVpVX1iqm2vqnpvVZ03fN9zaK+qenFVnV9VH6+qu0695/Dh+POq6vB51QsAALCceY40vTrJg7Zpe1qSU7v7oCSnDttJ8uAkBw1fRyV5aTIJWUmOTXJIkrsnOXYpaAEAAKyHuYWm7v5gksu3aT40yQnD6xOSPGKq/TU98eEke1TVrZP8XJL3dvfl3X1FkvfmukEMAABgbtb7maZbdfclSTJ833do3y/JhVPHXTS0rdQOAACwLhZlIYhapq1H2q97gqqjqurMqjrzsssuW9PiAACAndd6h6YvDtPuMny/dGi/KMltp47bP8nFI+3X0d3HdffB3X3wPvvss+aFAwAAO6f1Dk0nJ1laAe/wJCdNtf/ysIrePZJcOUzfe3eSn62qPYcFIH52aNtUTjz9gpx4+gUbXQYAALADdp3Xiavq9Unum2Tvqrook1Xw/jTJm6rqyCQXJHn0cPgpSR6S5Pwk30hyRJJ09+VV9ZwkZwzH/WF3b7u4BAAAwNzMLTR192NX2PWAZY7tJEevcJ5XJnnlGpYGAAAws0VZCAIAAGAhCU0AAAAjhCYAAIARQhMAAMAIoQkAAGCE0AQAADBCaAIAABghNAEAAIwQmgAAAEYITQAAACOEJgAAgBFCEwAAwAihCQAAYITQBAAAMEJoAgAAGCE0AQAAjBCaAAAARghNAAAAI4QmAACAEUITAADACKEJAABghNAEAAAwQmgCAAAYITQBAACMEJoAAABGCE0AAAAjhCYAAIARQhMAAMAIoQkAAGCE0AQAADBCaAIAABghNAEAAIwQmgAAAEYITQAAACOEJgAAgBFCEwAAwAihCQAAYITQBAAAMEJoAgAAGCE0AQAAjBCaAAAARghNAAAAI4QmAACAEUITAADACKEJAABghNAEAAAwQmgCAAAYITQBAACMEJoAAABGCE0AAAAjhCYAAIARQhMAAMAIoQkAAGCE0AQAADBCaAIAABghNAEAAIwQmgAAAEYITQAAACOEJgAAgBFCEwAAwAihCQAAYITQBAAAMEJoAgAAGCE0AQAAjBCaAAAARghNAAAAI4QmAACAEUITAADACKEJAABghNAEAAAwQmgCAAAYITQBAACMEJoAAABGCE0AAAAjNiQ0VdVvV9Unq+oTVfX6qrpJVd2+qk6vqvOq6o1VdaPh2BsP2+cP+w/ciJoBAICd07qHpqraL8lvJjm4u38syS5JHpPkeUle2N0HJbkiyZHDW45MckV33yHJC4fjAAAA1sVGTc/bNclNq2rXJDdLckmS+yd587D/hCSPGF4fOmxn2P+Aqqp1rBUAANiJrXto6u7/SPLnSS7IJCxdmeSsJF/p7muGwy5Kst/wer8kFw7vvWY4/pbrWTMAALDz2ojpeXtmMnp0+yS3SbJbkgcvc2gvvWVk3/R5j6qqM6vqzMsuu2ytygUAAHZyGzE974FJPtfdl3X3t5K8NclPJtljmK6XJPsnuXh4fVGS2ybJsP/7kly+7Um7+7juPri7D95nn33m/TMAAAA7iY0ITRckuUdV3Wx4NukBST6V5P1JHjUcc3iSk4bXJw/bGfa/r7uvM9IEAAAwDxvxTNPpmSzocHaSc4cajkvy1CRPrqrzM3lm6fjhLccnueXQ/uQkT1vvmgEAgJ3Xrqsfsva6+9gkx27T/Nkkd1/m2P9K8uj1qAsAAGBbG7XkOAAAwKYgNAEAAIwQmgAAAEYITQAAACOEJgAAgBFCEwAAwAihCQAAYITQBAAAMEJoAgAAGCE0AQAAjBCaAAAARghNAAAAI4QmAACAEUITAADACKEJAABghNAEAAAwQmgCAAAYITQBAACMEJoAAABGCE0AAAAjhCYAAIARQhMAAMAIoWkdnXj6BRtdAgAAsJ1mCk1VdbuqeuDw+qZVtft8ywIAAFgMq4amqnpCkjcnednQtH+St8+zKAAAgEUxy0jT0UnuleSrSdLd5yXZd55FAQAALIpZQtPV3f3NpY2q2jVJz68kAACAxTFLaPpAVT0jyU2r6meS/E2Sd8y3LAAAgMUwS2h6WpLLkpyb5IlJTknyzHkWBQAAsCh2Xe2A7r62qv46yQe7+zPrUBMAAMDCmGX1vIcnOSfJ3w3bd6mqk+ddGAAAwCKYZXresUnunuQrSdLd5yQ5cI41AQAALIxZQtM13X3l3CsBAABYQKs+05TkE1V1WJJdquqgJL+Z5J/mWxYAAMBimGWk6UlJfjTJ1UlOTHJlkt+aZ1EAAACLYnSkqap2SfLs7v69JL+/PiUBAAAsjtGRpu7+dpL/vk61AAAALJxZnmn66LDE+N8k+fpSY3e/dW5VAQAALIhZQtNeSb6c5P5TbZ1EaAIAALa8VUNTdx+xHoUAAAAsolVDU1W9eJnmK5Oc2d0nrX1JAAAAi2OWJcdvkuQuSc4bvn48kyl7R1bVX8yxNgAAgA03yzNNd0hy/+6+Jkmq6qVJ3pPkZ5KcO8faAAAANtwsI037Jdltanu3JLcZliO/ei5VAQAALIhZRpr+T5Jzquq0JJXk3kn+uKp2S/L3c6wNAABgw82yet7xVXVKkrtnEpqe0d0XD7t/b57FAQAAbLRVp+dVVSV5QJI7d/fbk+xaVXefe2UAAAALYJZnmv5vknsmeeywfVWSl8ytIgAAgAUyyzNNh3T3Xavqo0nS3VdU1Y3mXBcAAMBCmGWk6VtVtUuSTpKq2ifJtXOtCgAAYEHMEppenORtSfatqj9K8qEkfzzXqgAAABbELKvnva6qzspkMYhK8oju/vTcKwMAAFgAK4amqtpravPSJK+f3tfdl8+zMAAAgEUwNtJ0VibPMVWSA5JcMbzeI8kFSW4/9+oAAAA22IrPNHX37bv7B5K8O8nDunvv7r5lkocmeet6FQgAALCRZlkI4m7dfcrSRne/K8l95lcSAADA4pjlc5q+VFXPTPLXmUzX+6UkX55rVQAAAAtilpGmxybZJ5Nlx982vH7sPIsCAABYFLMsOX55kmPWoRYAAICFM8tIEwAAwE5LaAIAABghNAEAAIxYNTRV1Q9V1alV9Ylh+8eH1fQAAAC2vFlGml6e5OlJvpUk3f3xJI+ZZ1EAAACLYpbQdLPu/sg2bdfMoxgAAIBFM0to+lJV/WAmH2ybqnpUkkvmWhUAAMCCWPVzmpIcneS4JD9cVf+R5HNJfmmuVQEAACyIWT7c9rNJHlhVuyW5QXdfNf+yAAAAFsOKoamqnrxCe5Kku18wp5oAAAAWxthI0+7D9zsmuVuSk4fthyX54DyLAgAAWBQrhqbufnaSVNV7ktx1aVpeVT0ryd+sS3UAAAAbbJbV8w5I8s2p7W8mOXAu1QAAACyYWVbPe22Sj1TV24btRyQ5YX4lAQAALI5ZVs/7o6p6V5KfzuSzmo7o7o/OvbIt6sTTL0iSHHbIARtcCQAAMItZpuclybeTXDv1db1U1R5V9eaq+peq+nRV3bOq9qqq91bVecP3PYdjq6peXFXnV9XHq+qu1/f6AAAAs1o1NFXVMUlel2TvJPsm+euqetL1vO6Lkvxdd/9wkjsn+XSSpyU5tbsPSnLqsJ0kD05y0PB1VJKXXs9rAwAAzGyWZ5qOTHJId389SarqeUn+Oclf7sgFq+oWSe6d5FeSpLu/meSbVXVokvsOh52Q5LQkT01yaJLXdHcn+fAwSnXr7r5kR64PAACwPWaZnleZTM9b8u2hbUf9QJLLkryqqj5aVa+oqt2S3GopCA3f9x2O3y/JhVPvv2hoAwAAmLtZRppeleT0bVbPO/56XvOuSZ7U3adX1Yvy3al4y1kuoPV1Dqo6KpPpezngAIssAAAAa2PVkabufkGSX01yeZIrMlk97y+uxzUvSnJRd58+bL85kxD1xaq6dZIM3y+dOv62U+/fP8nFy9R5XHcf3N0H77PPPtejPAAAgO+adfW8czIJN29L8uWq2uGhnO7+QpILq+qOQ9MDknwqyclJDh/aDk9y0vD65CS/PKyid48kV3qeCQAAWC+rTs8bVso7NskX893nmTrJj1+P6z4pyeuq6kZJPpvkiEwC3Juq6sgkFyR59HDsKUkekuT8JN8YjgUAAFgXszzTdEySO3b3l9fqot19TpKDl9n1gGWO7SRHr9W1AQAAtscs0/MuTHLlvAsBAABYRLOMNH02yWlV9bdJrl5qHBaIAAAA2NJmCU0XDF83Gr5YAyeefkGS5LBDLI8OAACLbNXQ1N3PXo9CAAAAFtGsS44DAADslIQmAACAEUITAADAiFk+3HafJE9IcuD08d39q/MrCwAAYDHMsnreSUn+IcnfJ/n2fMsBAABYLLOEppt191PnXgkAAMACmuWZpndW1UPmXgkAAMACWnGkqaquStJJKskzqurqJN8atru7b7E+JQIAAGycFUNTd+++noUAAAAsolWn51XVqbO0AQAAbEVj0/NukmS3JHtX1Z6ZTMtLklskuc061AYAALDhxlbPe2KS38okIJ091f7VJC+ZZ1EAAACLYuyZphcleVFVPam7/3IdawIAAFgYY9Pz7t/d70vyH1X1yG33d/db51oZAADAAhibnnefJO9L8rBl9nUSoQkAANjyxqbnHTt8P2L9ygEAAFgsYyNNSZKq+rckH07yD0k+2N2fmntVAAAAC2LVz2lKcqckL0tyyyR/XlWfraq3zbcsAACAxTBLaPp2km8N369N8sUkl86zKAAAgEWx6vS8TD6X6dwkL0jy8u7+8nxLAgAAWByzjDQ9NskHk/x/Sd5QVc+uqgfMtywAAIDFsOpIU3eflOSkqvrhJA9O8ltJnpLkpnOuDQAAYMOtOtJUVW8ZVtB7UZLdkvxykj3nXRgAAMAimOWZpj9NcnZ3f3vexQAAACyaWabnnbEehQAAACyiWRaCAAAA2GkJTQAAACNmWQjiXlW12/D6l6rqBVV1u/mXBgAAsPFmGWl6aZJvVNWdM1lq/N+TvGauVQEAACyIWULTNd3dSQ5N8qLuflGS3edbFgAAwGKYZcnxq6rq6Uken+Snq2qXJDecb1kAAACLYZaRpl9McnWSX+3uLyTZL8mfzbUqAACABbFqaBqC0luS3Hho+lKSt82zKAAAgEUxy+p5T0jy5iQvG5r2S/L2eRYFAACwKGaZnnd0knsl+WqSdPd5SfadZ1EAAACLYpbQdHV3f3Npo6p2TdLzKwkAAGBxzBKaPlBVz0hy06r6mSR/k+Qd8y0LAABgMcwSmp6W5LIk5yZ5YpJTkjxznkUBAAAsilU/p6m7r03y8uELAABgp7JqaKqqeyV5VpLbDcdXku7uH5hvaQAAABtv1dCU5Pgkv53krCTfnm85AAAAi2WW0HRld79r7pXspE48/YIkyWGHHLDBlQAAAMuZJTS9v6r+LMlbk1y91NjdZ8+tKgAAgAUxS2g6ZPh+8FRbJ7n/2pcDAACwWGZZPe9+61EIAADAIlr1c5qq6lZVdXxVvWvYvlNVHTn/0gAAADbeLB9u++ok705ym2H7X5P81rwKAgAAWCSzhKa9u/tNSa5Nku6+JpYeBwAAdhKzhKavV9UtM1n8IVV1jyRXzrUqAACABTHL6nlPTnJykh+sqn9Msk+SR821KgAAgAUxy+p5Z1fVfZLcMUkl+Ux3f2vulQEAACyAVUNTVT1ym6Yfqqork5zb3ZfOpywAAIDFMMv0vCOT3DPJ+4ft+yb5cCbh6Q+7+7Vzqg0AAGDDzRKark3yI939xWTyuU1JXprkkCQfTCI0AQAAW9Ysq+cduBSYBpcm+aHuvjyJZ5sAAIAtbZaRpn+oqncm+Zth+xeSfLCqdkvylblVBgAAsABmCU1HJ3lkkp/KZPW81yR5S3d3kvvNsTYAAIANN8uS453kLcMXAADATmWWZ5oAAAB2WkITAADAiBVDU1WdOnx/3vqVAwAAsFjGnmm6dVXdJ8nDq+oNmSwC8R3dffZcKwMAAFgAY6HpD5I8Lcn+SV6wzb5Ocv95FbUzOvH0C5Ikhx1ywAZXAgAATFsxNHX3m5O8uar+d3c/Zx1rAgAAWBizLDn+nKp6eJJ7D02ndfc751sWAADAYlh19byq+pMkxyT51PB1zNAGAACw5a060pTk55PcpbuvTZKqOiHJR5M8fZ6FAQAALIJZP6dpj6nX3zePQgAAABbRLCNNf5Lko1X1/kyWHb93jDIBAAA7iVkWgnh9VZ2W5G6ZhKandvcX5l0YAADAIphlpCndfUmSk+dcCwAAwMKZ9ZmmNVdVu1TVR6vqncP27avq9Ko6r6reWFU3GtpvPGyfP+w/cKNqBgAAdj4bFpoyWcb801Pbz0vywu4+KMkVSY4c2o9MckV33yHJC4fjAAAA1sVoaKqqG1TVJ9b6olW1fyZLmb9i2K4k90/y5uGQE5I8Ynh96LCdYf8DhuMBAADmbjQ0DZ/N9LGqOmCNr/sXSZ6S5Nph+5ZJvtLd1wzbFyXZb3i9X5ILh3quSXLlcPz3qKqjqurMqjrzsssuW+NyAQCAndUsC0HcOsknq+ojSb6+1NjdD9+RC1bVQ5Nc2t1nVdV9l5qXObRn2Pfdhu7jkhyXJAcffPB19gMAAOyIWULTs9f4mvdK8vCqekiSmyS5RSYjT3tU1a7DaNL+SS4ejr8oyW2TXFRVu2by4bqXr3FNAAAAy1p1IYju/kCSzye54fD6jCRn7+gFu/vp3b1/dx+Y5DFJ3tfdj0vy/iSPGg47PMlJw+uTh+0M+9/X3UaSAACAdbFqaKqqJ2SyAMPLhqb9krx9DrU8NcmTq+r8TJ5ZOn5oPz7JLYf2Jyd52hyuDQAAsKxZpucdneTuSU5Pku4+r6r2XYuLd/dpSU4bXn92uM62x/xXkkevxfUAAAC21yyf03R1d39zaWN4rsj0OAAAYKcwS2j6QFU9I8lNq+pnkvxNknfMtywAAIDFMEtoelqSy5Kcm+SJSU5J8sx5FrUzO/H0Cza6BAAAYMqqzzR197VVdUImzzR1ks9YvQ4AANhZrBqaqurnk/y/JP+WyQfN3r6qntjd75p3cQAAABttltXznp/kft19fpJU1Q8m+dskQhMAALDlzfJM06VLgWnw2SSXzqkeAACAhbLiSFNVPXJ4+cmqOiXJmzJ5punRSc5Yh9oAAAA23Nj0vIdNvf5ikvsMry9LsufcKgIAAFggK4am7j5iPQsBAABYRLOsnnf7JE9KcuD08d398PmVBQAAsBhmWT3v7UmOT/KOJNfOtxwAAIDFMkto+q/ufvHcKwEAAFhAs4SmF1XVsUnek+QsaOFyAAASS0lEQVTqpcbuPntuVQEAACyIWULTf0vy+CT3z3en5/WwDQAAsKXNEpr+R5If6O5vzrsYAACARXODGY75WJI95l0IAADAIpplpOlWSf6lqs7I9z7TZMlxAABgy5slNB079yoAAAAW1Kqhqbs/sB6FAAAALKJVQ1NVXZXJanlJcqMkN0zy9e6+xTwLIznx9AuSJIcdcsAGVwIAADuvWUaadp/erqpHJLn73CoCAABYILOsnvc9uvvt8RlNAADATmKW6XmPnNq8QZKD893pegAAAFvaLKvnPWzq9TVJPp/k0LlUQ5LvPssEAABsvFmeaTpiPQoBAABYRCuGpqr6g5H3dXc/Zw71AAAALJSxkaavL9O2W5Ijk9wyidAEAABseSuGpu5+/tLrqto9yTFJjkjyhiTPX+l9AAAAW8noM01VtVeSJyd5XJITkty1u69Yj8IAAAAWwYqf01RVf5bkjCRXJflv3f0sgWljWVUPAADW39hI0+8kuTrJM5P8flUttVcmC0HcYs61MRCWAABg44w907TiKBQAAMDOQjACAAAYITQBAACMEJoAAABGCE0AAAAjhCYAAIARQhMAAMAIoQkAAGCE0AQAADBCaAIAABghNAEAAIwQmgAAAEYITZvciadfkBNPv2CjywAAgC1LaAIAABghNAEAAIwQmgAAAEbsutEFsGM8xwQAAOvDSBMAAMAIoQkAAGCE0AQAADBCaAIAABhhIYhNxgIQAACwvow0AQAAjBCaAAAARghNAAAAI4QmAACAEUITAADACKEJAABghNAEAAAwQmgCAAAYITQBAACMEJq2oBNPvyAnnn7BRpcBAABbgtAEAAAwQmgCAAAYITTtBEzXAwCAHSc0AQAAjBCaAAAARghNW4QpeAAAMB9CEwAAwIhdN7oA5sfIEwAAXH9GmgAAAEYITQAAACPWPTRV1W2r6v1V9emq+mRVHTO071VV762q84bvew7tVVUvrqrzq+rjVXXX9a4ZAADYeW3ESNM1SX6nu38kyT2SHF1Vd0rytCSndvdBSU4dtpPkwUkOGr6OSvLS9S8ZAADYWa17aOruS7r77OH1VUk+nWS/JIcmOWE47IQkjxheH5rkNT3x4SR7VNWt17lsAABgJ7WhzzRV1YFJfiLJ6Ulu1d2XJJNglWTf4bD9klw49baLhjaWMeuKeT7XCQAAZrNhoamqbp7kLUl+q7u/OnboMm29zPmOqqozq+rMyy67bK3KBAAAdnIbEpqq6oaZBKbXdfdbh+YvLk27G75fOrRflOS2U2/fP8nF256zu4/r7oO7++B99tlnfsUDAAA7lY1YPa+SHJ/k0939gqldJyc5fHh9eJKTptp/eVhF7x5JrlyaxgcAADBvu27ANe+V5PFJzq2qc4a2ZyT50yRvqqojk1yQ5NHDvlOSPCTJ+Um+keSI9S0XAADYma17aOruD2X555SS5AHLHN9Jjp5rUQAAACvY0NXzWBxW0wMAgOUJTQAAACOEJgAAgBFCEwAAwAihCQAAYITQBAAAMEJoAgAAGLERH27LBrKsOAAAbB8jTQAAACOEJgAAgBFCEwAAwAihCQAAYITQtBOxCAQAAGw/oYnrOPH0CwQsAAAYCE0AAAAjhCYAAIARPtx2J2caHgAAjDPSBAAAMEJo4ntMjzxZEAIAAIQmAACAUUITAADACKEJAABghNAEAAAwQmgCAAAYITSxKqvoAQCwMxOamJnlyAEA2BkJTQAAACN23egC2FyMLgEAsLMx0gQAADBCaAIAABghNAEAAIwQmrherKIHAMBWJzSx5gQpAAC2EqGJNSEoAQCwVQlNAAAAI4Qm5s4oFAAAm5nQxJoSjgAA2Gp23egC2LoEKAAAtgIjTQAAACOEJtaV55sAANhshCYAAIARQhMLwQgUAACLSmhi3WxvKBKkAABYBEITG0IgAgBgsxCaWCiCFAAAi0ZoAgAAGCE0AQAAjBCaWHiefwIAYCPtutEFsHMThgAAWHRGmtiyjFABALAWjDSxcK5v0BGUAABYS0aa2JSMIgEAsF6MNLFpLBeSltoOO+SA7TrP9hwPAMDOzUgTAADACCNNbCljU/ZM5wMAYEcITWwJa7V4xGGHHLBDU/4AANi6hCa2PCNMAABcH0ITOzWBCgCA1VgIAmZkmXMAgJ2TkSZ2StsTfgQlAICdm9AEKxCWAABITM+D7zFrUNp2qp6pewAAW5eRJlgA04HLUucAAIvFSBOsoeVGm4xCAQBsbkaa4HoYC0OCEgDA1iA0wQYSrAAAFp/QBNtpR4PO0vsOO+SANQ9L0+cGAGBtCU2wxlYLRLPuF4AAABaD0AQLbtuQJUwBAKwvq+fBJjP2+VA7+jlTO3r9tTgfAMCiE5pgE5olpKx0zEav+CdcAQCbjel5sKDmtVjEjl5rlhGtsQ/p3Z5rmIIIACwSoQm2mG3DyfaGpbUKLtcnJC0XvtYjUC23wqEABwAITcC6W6tRtJUWyRg7/zxCkIAFAFvbpglNVfWgJC9KskuSV3T3n25wSbClreezR9tzrbV4Jmu1kLPas2CrhbPVRspWG2Fb7rO8tjeQXd8gt2hBcNHqWUtb+WcD2Co2RWiqql2SvCTJzyS5KMkZVXVyd39qYysDNrNZflnd3sU01vr6s9Y0bfp82/MzjoXB7T3P9gTD5Y6ZpU92pP9MwQRgR2yK0JTk7knO7+7PJklVvSHJoUmEJtji1mNBjHlNF1yubUePmWXfWh8zdtz2BptZjtmR8LPaeWYZDZzXdM5Z+2itrFWwnfVa6/m84XLXn+Wa867t+v4Zr/c9Auy46u6NrmFVVfWoJA/q7l8bth+f5JDu/o3ljj/44IP7zDPPXM8SR1liGWBzm+V5uc1u7GecZTXMefTR9pxze55pnPWY6/szbc8qotf3Hw1mqWO5ab/b07fbXn/s/cv9PNtT96x/DquF99X6fKWR9uWuP3bOWfp4rf6hYez913cGw46O3M+ztnmrqrO6++BVj9skoenRSX5um9B09+5+0tQxRyU5ati8Y5LPrHuh17V3ki9tdBFbnD6eP308f/p4/vTx/Onj+dPH86eP52/R+vh23b3Pagdtlul5FyW57dT2/kkunj6gu49Lctx6FrWaqjpzluTKjtPH86eP508fz58+nj99PH/6eP708fxt1j6+wUYXMKMzkhxUVbevqhsleUySkze4JgAAYCewKUaauvuaqvqNJO/OZMnxV3b3Jze4LAAAYCewKUJTknT3KUlO2eg6ttNCTRfcovTx/Onj+dPH86eP508fz58+nj99PH+bso83xUIQAAAAG2WzPNMEAACwIYSmOamqB1XVZ6rq/Kp62kbXs1VU1eer6tyqOqeqzhza9qqq91bVecP3PTe6zs2kql5ZVZdW1Sem2pbt05p48XBff7yq7rpxlW8eK/Txs6rqP4Z7+ZyqesjUvqcPffyZqvq5jal686iq21bV+6vq01X1yao6Zmh3H6+RkT52H6+RqrpJVX2kqj429PGzh/bbV9Xpw338xmFBrFTVjYft84f9B25k/ZvBSB+/uqo+N3Uf32Vo93fFDqqqXarqo1X1zmF709/HQtMcVNUuSV6S5MFJ7pTksVV1p42taku5X3ffZWq5yqclObW7D0py6rDN7F6d5EHbtK3Upw9OctDwdVSSl65TjZvdq3PdPk6SFw738l2G5zYz/F3xmCQ/Orzn/w5/p7Cya5L8Tnf/SJJ7JDl66Ef38dpZqY8T9/FauTrJ/bv7zknukuRBVXWPJM/LpI8PSnJFkiOH449MckV33yHJC4fjGLdSHyfJ703dx+cMbf6u2HHHJPn01Pamv4+Fpvm4e5Lzu/uz3f3NJG9IcugG17SVHZrkhOH1CUkesYG1bDrd/cEkl2/TvFKfHprkNT3x4SR7VNWt16fSzWuFPl7JoUne0N1Xd/fnkpyfyd8prKC7L+nus4fXV2XyP+r94j5eMyN9vBL38XYa7sevDZs3HL46yf2TvHlo3/Y+Xrq/35zkAVVV61TupjTSxyvxd8UOqKr9k/x8klcM25UtcB8LTfOxX5ILp7Yvyvj/XJhdJ3lPVZ1VVUcNbbfq7kuSyf/Yk+y7YdVtHSv1qXt7bf3GMOXjlfXdaaX6+HoYpnb8RJLT4z6ei236OHEfr5lhStM5SS5N8t4k/5bkK919zXDIdD9+p4+H/VcmueX6Vrz5bNvH3b10H//RcB+/sKpuPLS5j3fMXyR5SpJrh+1bZgvcx0LTfCyXkC1TuDbu1d13zWTI/OiquvdGF7STcW+vnZcm+cFMpohckuT5Q7s+3kFVdfMkb0nyW9391bFDl2nTxzNYpo/dx2uou7/d3XdJsn8mI3M/stxhw3d9vAO27eOq+rEkT0/yw0nulmSvJE8dDtfH26mqHprk0u4+a7p5mUM33X0sNM3HRUluO7W9f5KLN6iWLaW7Lx6+X5rkbZn8T+WLS8Plw/dLN67CLWOlPnVvr5Hu/uLwP+9rk7w83526pI93QFXdMJNf5l/X3W8dmt3Ha2i5PnYfz0d3fyXJaZk8P7ZHVS19ruZ0P36nj4f935fZpwHv9Kb6+EHD9NPu7quTvCru4+vjXkkeXlWfz+TxlPtnMvK06e9joWk+zkhy0LBSyI0yeRj25A2uadOrqt2qavel10l+NsknMunbw4fDDk9y0sZUuKWs1KcnJ/nlYUWheyS5cmn6E9tnm3nx/yOTezmZ9PFjhhWFbp/JA8gfWe/6NpNh/vvxST7d3S+Y2uU+XiMr9bH7eO1U1T5Vtcfw+qZJHpjJs2PvT/Ko4bBt7+Ol+/tRSd7XPnxz1Ap9/C9T/7hSmTxrM30f+7tiO3T307t7/+4+MJPff9/X3Y/LFriPd139ELZXd19TVb+R5N1Jdknyyu7+5AaXtRXcKsnbhucDd01yYnf/XVWdkeRNVXVkkguSPHoDa9x0qur1Se6bZO+quijJsUn+NMv36SlJHpLJQ93fSHLEuhe8Ca3Qx/cdlrXtJJ9P8sQk6e5PVtWbknwqkxXLju7ub29E3ZvIvZI8Psm5w7MKSfKMuI/X0kp9/Fj38Zq5dZIThlUGb5DkTd39zqr6VJI3VNVzk3w0k/Ca4ftrq+r8TP5l/jEbUfQms1Ifv6+q9slkqtg5SX59ON7fFWvnqdnk93EtaJgDAABYCKbnAQAAjBCaAAAARghNAAAAI4QmAACAEUITAADACKEJgHVRVV1Vz5/a/t2qetZ2nuNr23n856vq3OHrU1X13Kq68facAwCEJgDWy9VJHllVe6/zde/X3f8tyd2T/ECS49bipFOfbg/AFic0AbBerskksPz2tjuq6nZVdWpVfXz4fsDQfvuq+ueqOqOqnrPNe35vaP94VT17tYt399cy+dDKR1TVXmPnqKr/XVX/UlXvrarXV9XvDu2nVdUfV9UHkhxTVftU1VuGc5xRVfcajtutql45tH20qg7d4V4DYMMJTQCsp5ckeVxVfd827X+V5DXd/eNJXpfkxUP7i5K8tLvvluQLSwdX1c8mOSiT0aO7JPnvVXXv1S7e3V9N8rkkB610jqo6OMkvJPmJJI9McvA2p9mju+/T3c8f6nvhUN8vJHnFcMzvJ3nf0H6/JH9WVbutVh8Ai8nUAgDWTXd/tapek+Q3k/zn1K57ZhJQkuS1Sf7P8PpemYSRpfbnDa9/dvj66LB980wC0AdnKKNWOcfuSU7q7v9Mkqp6xzbvf+PU6wcmuVPV0ilzi6rafTjvw5dGqJLcJMkBST49Q30ALBihCYD19hdJzk7yqpFjeoXXSyrJn3T3y7bnwkOgOTDJv650jqq6zvTBbXx96vUNktxzKWBNnaOS/EJ3f2Z76gNgMZmeB8C66u7Lk7wpyZFTzf+U5DHD68cl+dDw+h+3aV/y7iS/WlU3T5Kq2q+q9h277nDs/03y9u6+YuQcH0rysKq6ybDv50dO+54kvzF1jbtM1fekITylqn5irDYAFpvQBMBGeH6S6VX0fjPJEVX18SSPT3LM0H5MkqOr6owk33kOqrvfk+TEJP9cVecmeXMm0+qW8/6q+kSSjyS5IMkTx87R3WckOTnJx5K8NcmZSa5c4dy/meTgYSGJT2Wy0ESSPCfJDZN8fLj2c1Z4PwCbQHUvN+sBAHZeVXXz7v5aVd0sk+ekjurusze6LgA2hmeaAOC6jquqO2WygMMJAhPAzs1IEwAAwAjPNAEAAIwQmgAAAEYITQAAACOEJgAAgBFCEwAAwAihCQAAYMT/D1XgYDa9U+DHAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 1008x576 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "node_degrees = np.array([i[1] for i in list(G.degree())])\n",
    "fig = plt.figure(figsize=(14, 8))\n",
    "bins = np.linspace(1, 400, 400)\n",
    "sns.distplot(node_degrees, kde=False, bins=bins)\n",
    "\n",
    "#ax = plt.hist(node_degrees, bins)\n",
    "plt.xlabel('Node Degree')\n",
    "plt.ylabel('Number of genes with node degree')\n",
    "plt.title('Node Degree Distribution of Genes in PPI')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "### Store Adjacency matrix on Disk"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "f = h5py.File('../data/networks/CPDB_ppi_ens.h5', 'w')\n",
    "f.create_dataset('consensusPathDB_ppi', data=adjacency_matrix, shape=adjacency_matrix.shape)\n",
    "string_dt = h5py.special_dtype(vlen=str)\n",
    "f.create_dataset('gene_names', data=node_names, dtype=string_dt)\n",
    "f.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
